From 6c94cfd1491fb8cade2f5102cfca09ae4b05e900 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Mon, 7 Feb 2005 16:00:00 +0000 Subject: [PATCH] bitkeeper revision 1.1159.212.110 (42079080u5EKN2Dp7MbOEM7lnEs4gg) Various bug fixes, and NMI/DF improvements for x86_64. Signed-off-by: keir.fraser@cl.cam.ac.uk --- xen/arch/x86/boot/x86_32.S | 4 +- xen/arch/x86/boot/x86_64.S | 4 +- xen/arch/x86/setup.c | 10 ++--- xen/arch/x86/smpboot.c | 24 ++++------- xen/arch/x86/traps.c | 84 +++++++++++++++++++++++------------- xen/arch/x86/x86_32/entry.S | 22 +++++++--- xen/arch/x86/x86_32/traps.c | 45 ++++++++++--------- xen/arch/x86/x86_64/entry.S | 12 ++++-- xen/arch/x86/x86_64/traps.c | 80 +++++++++++++++++++++------------- xen/include/asm-x86/config.h | 4 ++ 10 files changed, 170 insertions(+), 119 deletions(-) diff --git a/xen/arch/x86/boot/x86_32.S b/xen/arch/x86/boot/x86_32.S index 0b15876e96..3ed99b6f4c 100644 --- a/xen/arch/x86/boot/x86_32.S +++ b/xen/arch/x86/boot/x86_32.S @@ -214,7 +214,7 @@ ENTRY(gdt_table) .org 0x1000 ENTRY(idle_pg_table) # Initial page directory is 4kB .org 0x2000 -ENTRY(cpu0_stack) # Initial stack is 8kB - .org 0x4000 +ENTRY(cpu0_stack) + .org 0x2000 + STACK_SIZE ENTRY(stext) ENTRY(_stext) diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S index 319f18a54b..d3414a3816 100644 --- a/xen/arch/x86/boot/x86_64.S +++ b/xen/arch/x86/boot/x86_64.S @@ -243,8 +243,8 @@ ENTRY(idle_pg_table_l2) identmap /* Too orangey for crows :-) */ .org 0x4000 -ENTRY(cpu0_stack) # Initial stack is 8kB +ENTRY(cpu0_stack) - .org 0x6000 + .org 0x4000 + STACK_SIZE ENTRY(stext) ENTRY(_stext) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index f8595633cc..8c699b4586 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -298,19 +298,21 @@ void __init identify_cpu(struct cpuinfo_x86 *c) unsigned long cpu_initialized; void __init cpu_init(void) { - extern void percpu_traps_init(void); int nr = smp_processor_id(); struct tss_struct *t = &init_tss[nr]; + unsigned char idt_load[10]; if ( test_and_set_bit(nr, &cpu_initialized) ) panic("CPU#%d already initialized!!!\n", nr); printk("Initializing CPU#%d\n", nr); - /* Set up GDT and IDT. */ SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES); SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS); __asm__ __volatile__ ( "lgdt %0" : "=m" (*current->arch.gdt) ); - __asm__ __volatile__ ( "lidt %0" : "=m" (idt_descr) ); + + *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1; + *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[nr]; + __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) ); /* No nested task. */ __asm__ __volatile__ ( "pushf ; andw $0xbfff,(%"__OP"sp) ; popf" ); @@ -336,8 +338,6 @@ void __init cpu_init(void) CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); #undef CD - percpu_traps_init(); - /* Install correct page table. */ write_ptbase(current); diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index fcf7d64646..6cf023fa17 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -388,34 +388,28 @@ static int cpucount; void __init start_secondary(void) { unsigned int cpu = cpucount; - /* 6 bytes suitable for passing to LIDT instruction. */ - unsigned char idt_load[6]; + extern void percpu_traps_init(void); extern void cpu_init(void); set_current(idle_task[cpu]); /* - * Dont put anything before smp_callin(), SMP - * booting is too fragile that we want to limit the - * things done here to the most necessary things. + * At this point, boot CPU has fully initialised the IDT. It is + * now safe to make ourselves a private copy. */ + idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES); + memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t)); + + percpu_traps_init(); + cpu_init(); + smp_callin(); while (!atomic_read(&smp_commenced)) rep_nop(); - /* - * At this point, boot CPU has fully initialised the IDT. It is - * now safe to make ourselves a private copy. - */ - idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES); - memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t)); - *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1; - *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu]; - __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) ); - /* * low-memory mappings have been cleared, flush them from the local TLBs * too. diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 9cd16fdb27..18f4948608 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -553,19 +553,55 @@ asmlinkage int do_general_protection(struct xen_regs *regs) return 0; } +unsigned long nmi_softirq_reason; +static void nmi_softirq(void) +{ + if ( dom0 == NULL ) + return; + + if ( test_and_clear_bit(0, &nmi_softirq_reason) ) + send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR); + + if ( test_and_clear_bit(1, &nmi_softirq_reason) ) + send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR); +} + asmlinkage void mem_parity_error(struct xen_regs *regs) { - console_force_unlock(); - printk("\n\nNMI - MEMORY ERROR\n"); - fatal_trap(TRAP_nmi, regs); + /* Clear and disable the parity-error line. */ + outb((inb(0x61)&15)|4,0x61); + + switch ( opt_nmi[0] ) + { + case 'd': /* 'dom0' */ + set_bit(0, &nmi_softirq_reason); + raise_softirq(NMI_SOFTIRQ); + case 'i': /* 'ignore' */ + break; + default: /* 'fatal' */ + console_force_unlock(); + printk("\n\nNMI - MEMORY ERROR\n"); + fatal_trap(TRAP_nmi, regs); + } } asmlinkage void io_check_error(struct xen_regs *regs) { - console_force_unlock(); + /* Clear and disable the I/O-error line. */ + outb((inb(0x61)&15)|8,0x61); - printk("\n\nNMI - I/O ERROR\n"); - fatal_trap(TRAP_nmi, regs); + switch ( opt_nmi[0] ) + { + case 'd': /* 'dom0' */ + set_bit(0, &nmi_softirq_reason); + raise_softirq(NMI_SOFTIRQ); + case 'i': /* 'ignore' */ + break; + default: /* 'fatal' */ + console_force_unlock(); + printk("\n\nNMI - I/O ERROR\n"); + fatal_trap(TRAP_nmi, regs); + } } static void unknown_nmi_error(unsigned char reason) @@ -579,25 +615,15 @@ asmlinkage void do_nmi(struct xen_regs *regs, unsigned long reason) { ++nmi_count(smp_processor_id()); -#if CONFIG_X86_LOCAL_APIC if ( nmi_watchdog ) nmi_watchdog_tick(regs); - else -#endif - unknown_nmi_error((unsigned char)(reason&0xff)); -} - -unsigned long nmi_softirq_reason; -static void nmi_softirq(void) -{ - if ( dom0 == NULL ) - return; - - if ( test_and_clear_bit(0, &nmi_softirq_reason) ) - send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR); - if ( test_and_clear_bit(1, &nmi_softirq_reason) ) - send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR); + if ( reason & 0x80 ) + mem_parity_error(regs); + else if ( reason & 0x40 ) + io_check_error(regs); + else if ( !nmi_watchdog ) + unknown_nmi_error((unsigned char)(reason&0xff)); } asmlinkage int math_state_restore(struct xen_regs *regs) @@ -706,8 +732,8 @@ void set_tss_desc(unsigned int n, void *addr) void __init trap_init(void) { - extern void doublefault_init(void); - doublefault_init(); + extern void percpu_traps_init(void); + extern void cpu_init(void); /* * Note that interrupt gates are always used, rather than trap gates. We @@ -745,13 +771,9 @@ void __init trap_init(void) /* CPU0 uses the master IDT. */ idt_tables[0] = idt_table; - /* - * Should be a barrier for any external CPU state. - */ - { - extern void cpu_init(void); - cpu_init(); - } + percpu_traps_init(); + + cpu_init(); open_softirq(NMI_SOFTIRQ, nmi_softirq); } diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S index d9a084ba95..fd10779f5d 100644 --- a/xen/arch/x86/x86_32/entry.S +++ b/xen/arch/x86/x86_32/entry.S @@ -596,7 +596,7 @@ ENTRY(nmi) # Okay, its almost a normal NMI tick. We can only process it if: # A. We are the outermost Xen activation (in which case we have # the selectors safely saved on our stack) - # B. DS-GS all contain sane Xen values. + # B. DS and ES contain sane Xen values. # In all other cases we bail without touching DS-GS, as we have # interrupted an enclosing Xen activation in tricky prologue or # epilogue code. @@ -644,11 +644,11 @@ nmi_parity_err: orb $0x4,%al outb %al,$0x61 cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore - je restore_all_xen + je nmi_out bts $0,%ss:SYMBOL_NAME(nmi_softirq_reason) bts $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat) cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0 - je restore_all_xen + je nmi_out movl $(__HYPERVISOR_DS),%edx # nmi=fatal movl %edx,%ds movl %edx,%es @@ -656,7 +656,15 @@ nmi_parity_err: push %edx call SYMBOL_NAME(mem_parity_error) addl $4,%esp - jmp ret_from_intr +nmi_out:movl %ss:XREGS_eflags(%esp),%eax + movb %ss:XREGS_cs(%esp),%al + testl $(3|X86_EFLAGS_VM),%eax + jz restore_all_xen + movl $(__HYPERVISOR_DS),%edx + movl %edx,%ds + movl %edx,%es + GET_CURRENT(%ebx) + jmp test_all_events nmi_io_err: # Clear and disable the I/O-error line @@ -664,11 +672,11 @@ nmi_io_err: orb $0x8,%al outb %al,$0x61 cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore - je restore_all_xen + je nmi_out bts $1,%ss:SYMBOL_NAME(nmi_softirq_reason) bts $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat) cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0 - je restore_all_xen + je nmi_out movl $(__HYPERVISOR_DS),%edx # nmi=fatal movl %edx,%ds movl %edx,%es @@ -676,7 +684,7 @@ nmi_io_err: push %edx call SYMBOL_NAME(io_check_error) addl $4,%esp - jmp ret_from_intr + jmp nmi_out ENTRY(setup_vm86_frame) diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c index ec11beb5d0..16c857cdb0 100644 --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -175,34 +175,33 @@ asmlinkage void do_double_fault(void) __asm__ __volatile__ ( "hlt" ); } -void __init doublefault_init(void) +void __init percpu_traps_init(void) { - /* - * Make a separate task for double faults. This will get us debug output if - * we blow the kernel stack. - */ - struct tss_struct *tss = &doublefault_tss; - memset(tss, 0, sizeof(*tss)); - tss->ds = __HYPERVISOR_DS; - tss->es = __HYPERVISOR_DS; - tss->ss = __HYPERVISOR_DS; - tss->esp = (unsigned long) - &doublefault_stack[DOUBLEFAULT_STACK_SIZE]; - tss->__cr3 = __pa(idle_pg_table); - tss->cs = __HYPERVISOR_CS; - tss->eip = (unsigned long)do_double_fault; - tss->eflags = 2; - tss->bitmap = IOBMP_INVALID_OFFSET; - _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY, - (unsigned long)tss, 235, 9); + if ( smp_processor_id() == 0 ) + { + /* + * Make a separate task for double faults. This will get us debug + * output if we blow the kernel stack. + */ + struct tss_struct *tss = &doublefault_tss; + memset(tss, 0, sizeof(*tss)); + tss->ds = __HYPERVISOR_DS; + tss->es = __HYPERVISOR_DS; + tss->ss = __HYPERVISOR_DS; + tss->esp = (unsigned long) + &doublefault_stack[DOUBLEFAULT_STACK_SIZE]; + tss->__cr3 = __pa(idle_pg_table); + tss->cs = __HYPERVISOR_CS; + tss->eip = (unsigned long)do_double_fault; + tss->eflags = 2; + tss->bitmap = IOBMP_INVALID_OFFSET; + _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY, + (unsigned long)tss, 235, 9); + } set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3); } -void __init percpu_traps_init(void) -{ -} - long set_fast_trap(struct exec_domain *p, int idx) { trap_info_t *ti; diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index ad1544092b..7dd3fd342c 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -12,9 +12,9 @@ #include ENTRY(hypercall) - movl $0x0833,8(%rsp) + movl $__GUEST_SS,8(%rsp) pushq %r11 - pushq $0x082b + pushq $__GUEST_CS pushq %rcx pushq $0 SAVE_ALL @@ -133,7 +133,13 @@ ENTRY(double_fault) jmp error_code ENTRY(nmi) - iretq + pushq $0 + SAVE_ALL + inb $0x61,%al + movl %eax,%esi # reason + movl %esp,%edi # regs + call SYMBOL_NAME(do_nmi) + jmp restore_all_xen .data diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c index 1460058f5e..323ea6865c 100644 --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -129,10 +129,7 @@ void show_page_walk(unsigned long addr) printk(" L1 = %p\n", page); } -#define DOUBLEFAULT_STACK_SIZE 1024 -static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE]; asmlinkage void double_fault(void); - asmlinkage void do_double_fault(struct xen_regs *regs) { /* Disable the NMI watchdog. It's useless now. */ @@ -142,19 +139,9 @@ asmlinkage void do_double_fault(struct xen_regs *regs) /* Find information saved during fault and dump it to the console. */ printk("************************************\n"); - printk("EIP: %04lx:[<%p>] \nEFLAGS: %p\n", - 0xffff & regs->cs, regs->rip, regs->eflags); - printk("rax: %p rbx: %p rcx: %p rdx: %p\n", - regs->rax, regs->rbx, regs->rcx, regs->rdx); - printk("rsi: %p rdi: %p rbp: %p rsp: %p\n", - regs->rsi, regs->rdi, regs->rbp, regs->rsp); - printk("r8: %p r9: %p r10: %p r11: %p\n", - regs->r8, regs->r9, regs->r10, regs->r11); - printk("r12: %p r13: %p r14: %p r15: %p\n", - regs->r12, regs->r13, regs->r14, regs->r15); + show_registers(regs); printk("************************************\n"); - printk("CPU%d DOUBLE FAULT -- system shutdown\n", - logical_smp_processor_id()); + printk("CPU%d DOUBLE FAULT -- system shutdown\n", smp_processor_id()); printk("System needs manual reset.\n"); printk("************************************\n"); @@ -166,25 +153,29 @@ asmlinkage void do_double_fault(struct xen_regs *regs) __asm__ __volatile__ ( "hlt" ); } -void __init doublefault_init(void) -{ - int i; - - /* Initialise IST1 for each CPU. Note the handler is non-reentrant. */ - for ( i = 0; i < NR_CPUS; i++ ) - init_tss[i].ist[0] = (unsigned long) - &doublefault_stack[DOUBLEFAULT_STACK_SIZE]; - - /* Set interrupt gate for double faults, specifying IST1. */ - set_intr_gate(TRAP_double_fault, &double_fault); - idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */ -} - asmlinkage void hypercall(void); void __init percpu_traps_init(void) { char *stack_top = (char *)get_stack_top(); char *stack = (char *)((unsigned long)stack_top & ~(STACK_SIZE - 1)); + int cpu = smp_processor_id(); + + /* Double-fault handler has its own per-CPU 1kB stack. */ + init_tss[cpu].ist[0] = (unsigned long)&stack[1024]; + set_intr_gate(TRAP_double_fault, &double_fault); + idt_tables[cpu][TRAP_double_fault].a |= 1UL << 32; /* IST1 */ + + /* NMI handler has its own per-CPU 1kB stack. */ + init_tss[cpu].ist[1] = (unsigned long)&stack[2048]; + idt_tables[cpu][TRAP_nmi].a |= 2UL << 32; /* IST2 */ + + /* + * Trampoline for SYSCALL entry from long mode. + */ + + /* Skip the NMI and DF stacks. */ + stack = &stack[2048]; + wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32)); /* movq %rsp, saversp(%rip) */ stack[0] = 0x48; @@ -202,9 +193,36 @@ void __init percpu_traps_init(void) stack[14] = 0xe9; *(u32 *)&stack[15] = (char *)hypercall - &stack[19]; + /* + * Trampoline for SYSCALL entry from compatibility mode. + */ + + /* Skip the long-mode entry trampoline. */ + stack = &stack[19]; + wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32)); + + /* movq %rsp, saversp(%rip) */ + stack[0] = 0x48; + stack[1] = 0x89; + stack[2] = 0x25; + *(u32 *)&stack[3] = (stack_top - &stack[7]) - 16; + + /* leaq saversp(%rip), %rsp */ + stack[7] = 0x48; + stack[8] = 0x8d; + stack[9] = 0x25; + *(u32 *)&stack[10] = (stack_top - &stack[14]) - 16; + + /* jmp hypercall */ + stack[14] = 0xe9; + *(u32 *)&stack[15] = (char *)hypercall - &stack[19]; + + /* + * Common SYSCALL parameters. + */ + wrmsr(MSR_STAR, 0, (FLAT_RING3_CS64<<16) | __HYPERVISOR_CS); - wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32)); - wrmsr(MSR_SYSCALL_MASK, 0xFFFFFFFFU, 0U); + wrmsr(MSR_SYSCALL_MASK, ~EF_IE, 0U); /* disable interrupts */ } void *decode_reg(struct xen_regs *regs, u8 b) diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index 78fd023a76..377d18975c 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -191,6 +191,10 @@ extern void __out_of_line_bug(int line) __attribute__((noreturn)); #define __HYPERVISOR_DS32 0x0818 #define __HYPERVISOR_DS __HYPERVISOR_DS64 +#define __GUEST_CS 0x082b +#define __GUEST_DS 0x0000 +#define __GUEST_SS 0x0833 + /* For generic assembly code: use macros to define operation/operand sizes. */ #define __OS "q" /* Operation Suffix */ #define __OP "r" /* Operand Prefix */ -- 2.30.2